/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Feb  2024
   Modification Date: Aug 2025, by Youssef Assarssah
   Description: Merge March and Sep 2020 Sus and save industry shocks
   Outputs:
   marchsep_sus_merged
   provid_java_crosswalk
   sus_marsep20_ind26shocks_java
   $KP_deid_susenas/Clean/sus_marsep20_leaveoutind26shocks_java
*/
/**********************************************************************/

/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

* include filepaths 
if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

clear all
set more off
set matsize 11000
set sortseed 123456

* Log
cap log close
global prefix: display %tdCYND td(`c(current_date)')
log using "$KP_logs/${prefix}_merge_marchsep20.txt", text replace

/*----------------------------------------------------*/
* Section: Create Merged Dataset
/*----------------------------------------------------*/

u "${KP_deid_susenas}/Clean/sus_mar20_deid_clean.dta", clear
 
/************
* Merge in Sep HHids to March
***********/

* Keep obs unique on HH-ID, dob, year, month, gender
drop if mi(renum, year_dob_sus, month_dob_sus, day_dob_sus, gender)
duplicates tag renum year_dob_sus month_dob_sus day_dob_sus gender, gen(dups)
drop if dups !=0

rename renum renumMar 
bysort renum (r403 r406c): gen ind_code_hh = ind_code[1]

fmerge m:1 renumMar using "${KP_deid_susenas}/Raw/idpanel", assert(2 3) nogen keep(3) keepusing(renumSep)

drop if mi(renumSep)

tempfile mar20
sa `mar20', replace

/************
* Merge march and Sep Data
***********/
u "${KP_deid_susenas}/Clean/sus_sep20_deid_clean.dta", clear

rename RENUM renum

* Keep obs unique on HH-ID, dob, year, month, gender
drop if mi(renum, year_dob_sus, month_dob_sus, day_dob_sus, gender)
duplicates tag renum year_dob_sus month_dob_sus day_dob_sus gender, gen(dups)
drop if dups !=0

keep renum year_dob_sus month_dob_sus day_dob_sus gender get_pk anon_id4 age_sus employed tot_adj_cons
rename renum renumSep
rename employed employedSep
rename tot_adj_cons tot_adj_consSep 

merge 1:1 renumSep year_dob_sus month_dob_sus day_dob_sus gender using `mar20', nogen update keep(3) keepusing(anon_id4 renumMar employed tot_adj_cons ind_code ind_code_hh age r101)

rename r101 prov_id

duplicates drop anon_id4 if !mi(anon_id4), force

datasignature 
  if "`r(datasignature)'" == "231540:17(46456):1615490520:3379984262" {
    sa "${KP_deid_susenas}/Clean/marchsep_sus_merged.dta", replace
      }
  else {
    di as err "Careful, your machine produces a different dataset"
    stop
		}
 
/*----------------------------------------------------*/
* Section: Create Industry Shocks
/*----------------------------------------------------*/

* Create java dummy
gen java = inrange(prov_id, 31, 36)

* Save provid_java_crosswalk
preserve 
keep java prov_id
duplicates drop
sort prov_id
save "${KP_deid_susenas}/Raw/provid_java_crosswalk.dta", replace
restore

keep if age >=18

* Add Log Vars
gen l_tot_adj_cons = ln(tot_adj_cons)
gen l_tot_adj_consSep = ln(tot_adj_consSep)

* Recode industry code
foreach s in "" "_hh" {
   
   gen ind_code10`s' =.
   replace ind_code10`s' = 11 if ind_code`s' ==1
   replace ind_code10`s' = 11 if ind_code`s' ==2
   replace ind_code10`s' = 11 if ind_code`s' ==3
   replace ind_code10`s' = 11 if ind_code`s' ==4
   replace ind_code10`s' = 11 if ind_code`s' ==5
   replace ind_code10`s' = 11 if ind_code`s' ==6
   replace ind_code10`s' = 10 if ind_code`s' ==7
   replace ind_code10`s' = 2 if ind_code`s' ==8
   replace ind_code10`s' = 7 if ind_code`s' ==9
   replace ind_code10`s' = 7 if ind_code`s' ==10
   replace ind_code10`s' = 6 if ind_code`s' ==11
   replace ind_code10`s' = 9 if ind_code`s' ==12
   replace ind_code10`s' = 8 if ind_code`s' ==13
   replace ind_code10`s' = 9 if ind_code`s' ==14
   replace ind_code10`s' = 3 if ind_code`s' ==15
   replace ind_code10`s' = 5 if ind_code`s' ==16
   replace ind_code10`s' = 5 if ind_code`s' ==17
   replace ind_code10`s' = 5 if ind_code`s' ==18
   replace ind_code10`s' = 5 if ind_code`s' ==19
   replace ind_code10`s' = 4 if ind_code`s' ==20
   replace ind_code10`s' = 4 if ind_code`s' ==21
   replace ind_code10`s' = 4 if ind_code`s' ==22
   replace ind_code10`s' = 4 if ind_code`s' ==23
   replace ind_code10`s' = 4 if ind_code`s' ==24
   replace ind_code10`s' = 4 if ind_code`s' ==25
   replace ind_code10`s' = 4 if ind_code`s' ==26

   rename ind_code`s' ind_code26`s'

   * Industry Leaveout Means
   foreach j in "" "java" {
      if "`j'" == "java" local js = "_java"
      else local js = ""

      foreach n in 10 26 {

         * No Leaveout
         bysort ind_code`n'`s' `j': egen mean_emp`n'`s'`js' = mean(employed)
         bysort ind_code`n'`s' `j': egen mean_empSep`n'`s'`js' = mean(employedSep)
         bysort ind_code`n'`s' `j': egen mean_cons`n'`s'`js' = mean(tot_adj_cons)
         bysort ind_code`n'`s' `j': egen mean_consSep`n'`s'`js' = mean(tot_adj_consSep)
         gen lmean_cons`n'`s'`js' = ln(mean_cons`n'`s'`js')
         gen lmean_consSep`n'`s'`js' = ln(mean_consSep`n'`s'`js')

         gen d_employed`n'`s'`js' =  mean_emp`n'`s'`js' - mean_empSep`n'`s'`js'
         gen d_tot_adj_cons`n'`s'`js' = mean_cons`n'`s'`js' - mean_consSep`n'`s'`js'
         gen d_l_tot_adj_cons`n'`s'`js' = lmean_cons`n'`s'`js' - lmean_consSep`n'`s'`js'

         * Leaveout 
         bysort ind_code`n'`s' `j': egen count_emp`n'`s'`js' = count(employed)
         bysort ind_code`n'`s' `j': egen count_empSep`n'`s'`js' = count(employedSep)
         bysort ind_code`n'`s' `j': egen count_cons`n'`s'`js' = count(tot_adj_cons)
         bysort ind_code`n'`s' `j': egen count_consSep`n'`s'`js' = count(tot_adj_consSep)

         gen mean_empl`n'`s'`js' = (mean_emp`n'`s'`js'*count_emp`n'`s'`js' - employed)/(count_emp`n'`s'`js'-1)
         gen mean_empSepl`n'`s'`js' = (mean_empSep`n'`s'`js'*count_empSep`n'`s'`js' - employedSep)/(count_empSep`n'`s'`js'-1)
         gen mean_consl`n'`s'`js' = (mean_cons`n'`s'`js'*count_cons`n'`s'`js' - tot_adj_cons)/(count_cons`n'`s'`js'-1)
         gen mean_consSepl`n'`s'`js' = (mean_consSep`n'`s'`js'*count_consSep`n'`s'`js' - tot_adj_consSep)/(count_consSep`n'`s'`js'-1)
         gen lmean_consl`n'`s'`js' = ln(mean_consl`n'`s'`js')
         gen lmean_consSepl`n'`s'`js' = ln(mean_consSepl`n'`s'`js')

         gen d_employedl`n'`s'`js' =  mean_empl`n'`s'`js' - mean_empSepl`n'`s'`js'
         gen d_tot_adj_consl`n'`s'`js' = mean_consl`n'`s'`js' - mean_consSepl`n'`s'`js'
         gen d_l_tot_adj_consl`n'`s'`js' = lmean_consl`n'`s'`js' - lmean_consSepl`n'`s'`js'
      
         * Save industry shocks data
         preserve
         bysort ind_code`n'`s' `j': keep if _n ==1
         keep ind_code`n'`s' `j' d_employed`n'`s'`js' d_tot_adj_cons`n'`s'`js' d_l_tot_adj_cons`n'`s'`js'
         sa "${KP_deid_susenas}/Clean/sus_marsep20_ind`n'shocks`js'`s'.dta", replace
         restore

         preserve
         keep ind_code`n'`s' `j' d_employedl`n'`s'`js' d_tot_adj_consl`n'`s'`js' d_l_tot_adj_consl`n'`s'`js' renumMar year_dob_sus month_dob_sus day_dob_sus gender
         sa "${KP_deid_susenas}/Clean/sus_marsep20_leaveoutind`n'shocks`js'`s'.dta", replace
         restore
      }
   }
}